Deep Learning: A Simple Example¶
Let’s get back to the Name Gender Classifier.
Prepare Data¶
import numpy as np
import nltk
from nltk.corpus import names
import random
labeled_names = ([(name, 1) for name in names.words('male.txt')] +
[(name, 0) for name in names.words('female.txt')])
random.shuffle(labeled_names)
Train-Test Split¶
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(labeled_names, test_size = 0.2, random_state=42)
print(len(train_set), len(test_set))
6355 1589
Feature Engineering¶
In deep learning, words or characters are automatically converted into numeric representations.
In other words, the feature engineering step is fully automatic.
Steps:
Text to Integers
Padding each instance to be of same lengths
import tensorflow as tf
import tensorflow.keras as keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.utils import to_categorical, plot_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import SpatialDropout1D
names = [n for (n, l) in train_set]
labels = [l for (n, l) in train_set]
len(names)
6355
Tokenizer¶
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(names)
Text to Sequences¶
names_ints = tokenizer.texts_to_sequences(names)
print(names[:10])
print(names_ints[:10])
print(labels[:10])
['Kimberlyn', 'Juan', 'Mariska', 'Rudd', 'Kitty', 'Janos', 'Aryn', 'Ana', 'Winny', 'Chevalier']
[[18, 3, 12, 15, 2, 5, 6, 11, 4], [19, 16, 1, 4], [12, 1, 5, 3, 9, 18, 1], [5, 16, 10, 10], [18, 3, 8, 8, 11], [19, 1, 4, 7, 9], [1, 5, 11, 4], [1, 4, 1], [23, 3, 4, 4, 11], [14, 13, 2, 20, 1, 6, 3, 2, 5]]
[0, 1, 0, 1, 0, 1, 0, 0, 0, 1]
Vocabulary¶
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
Vocabulary Size: 30
tokenizer.word_index
{'a': 1,
'e': 2,
'i': 3,
'n': 4,
'r': 5,
'l': 6,
'o': 7,
't': 8,
's': 9,
'd': 10,
'y': 11,
'm': 12,
'h': 13,
'c': 14,
'b': 15,
'u': 16,
'g': 17,
'k': 18,
'j': 19,
'v': 20,
'f': 21,
'p': 22,
'w': 23,
'z': 24,
'x': 25,
'q': 26,
'-': 27,
' ': 28,
"'": 29}
Padding¶
names_lens=[len(n) for n in names_ints]
names_lens
import seaborn as sns
sns.displot(names_lens)
print(names[np.argmax(names_lens)]) # longest name
Jean-Christophe
max_len = names_lens[np.argmax(names_lens)]
max_len
15
names_ints_pad = sequence.pad_sequences(names_ints, maxlen = max_len)
names_ints_pad[:10]
array([[ 0, 0, 0, 0, 0, 0, 18, 3, 12, 15, 2, 5, 6, 11, 4],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 16, 1, 4],
[ 0, 0, 0, 0, 0, 0, 0, 0, 12, 1, 5, 3, 9, 18, 1],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 16, 10, 10],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 3, 8, 8, 11],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 1, 4, 7, 9],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 11, 4],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 1],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 3, 4, 4, 11],
[ 0, 0, 0, 0, 0, 0, 14, 13, 2, 20, 1, 6, 3, 2, 5]],
dtype=int32)
Define X and Y¶
X_train = np.array(names_ints_pad).astype('float32')
y_train = np.array(labels)
X_test = np.array(sequence.pad_sequences(
tokenizer.texts_to_sequences([n for (n,l) in test_set]),
maxlen = max_len)).astype('float32')
y_test = np.array([l for (n,l) in test_set])
X_test_texts = [n for (n,l) in test_set]
X_train.shape
(6355, 15)
X_train[2,]
array([ 0., 0., 0., 0., 0., 0., 0., 0., 12., 1., 5., 3., 9.,
18., 1.], dtype=float32)
Model Definition¶
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
## Plotting results
# def plot(history):
# matplotlib.rcParams['figure.dpi'] = 100
# acc = history.history['accuracy']
# val_acc = history.history['val_accuracy']
# loss = history.history['loss']
# val_loss = history.history['val_loss']
# epochs = range(1, len(acc)+1)
# ## Accuracy plot
# plt.plot(epochs, acc, 'bo', label='Training acc')
# plt.plot(epochs, val_acc, 'b', label='Validation acc')
# plt.title('Training and validation accuracy')
# plt.legend()
# ## Loss plot
# plt.figure()
# plt.plot(epochs, loss, 'bo', label='Training loss')
# plt.plot(epochs, val_loss, 'b', label='Validation loss')
# plt.title('Training and validation loss')
# plt.legend()
# plt.show()
def plot(history):
pd.DataFrame(history.history).plot(figsize=(8,5))
plt.grid(True)
#plt.gca().set_ylim(0,1)
plt.show()
Model 1¶
Two layers of fully-connected dense layers
from keras import layers
model1 = keras.Sequential()
model1.add(keras.Input(shape=(max_len,)))
model1.add(layers.Dense(128, activation="relu", name="dense_layer_1"))
model1.add(layers.Dense(128, activation="relu", name="dense_layer_2"))
model1.add(layers.Dense(2, activation="softmax", name="output"))
model1.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=["accuracy"]
)
plot_model(model1, show_shapes=True )
history1 = model1.fit(X_train, y_train,
batch_size=128,
epochs=50, verbose=2,
validation_split = 0.2)
Epoch 1/50
40/40 - 3s - loss: 0.6426 - accuracy: 0.6764 - val_loss: 0.5912 - val_accuracy: 0.7183
Epoch 2/50
40/40 - 0s - loss: 0.5752 - accuracy: 0.7122 - val_loss: 0.5549 - val_accuracy: 0.7144
Epoch 3/50
40/40 - 0s - loss: 0.5504 - accuracy: 0.7254 - val_loss: 0.5492 - val_accuracy: 0.7231
Epoch 4/50
40/40 - 0s - loss: 0.5319 - accuracy: 0.7307 - val_loss: 0.5299 - val_accuracy: 0.7286
Epoch 5/50
40/40 - 0s - loss: 0.5258 - accuracy: 0.7349 - val_loss: 0.5234 - val_accuracy: 0.7270
Epoch 6/50
40/40 - 0s - loss: 0.5190 - accuracy: 0.7402 - val_loss: 0.5315 - val_accuracy: 0.7254
Epoch 7/50
40/40 - 0s - loss: 0.5092 - accuracy: 0.7469 - val_loss: 0.5171 - val_accuracy: 0.7309
Epoch 8/50
40/40 - 0s - loss: 0.4967 - accuracy: 0.7516 - val_loss: 0.5041 - val_accuracy: 0.7356
Epoch 9/50
40/40 - 0s - loss: 0.4848 - accuracy: 0.7598 - val_loss: 0.5238 - val_accuracy: 0.7356
Epoch 10/50
40/40 - 0s - loss: 0.4879 - accuracy: 0.7498 - val_loss: 0.5286 - val_accuracy: 0.7215
Epoch 11/50
40/40 - 0s - loss: 0.4826 - accuracy: 0.7604 - val_loss: 0.5182 - val_accuracy: 0.7530
Epoch 12/50
40/40 - 0s - loss: 0.4851 - accuracy: 0.7587 - val_loss: 0.5311 - val_accuracy: 0.7419
Epoch 13/50
40/40 - 0s - loss: 0.4721 - accuracy: 0.7638 - val_loss: 0.5045 - val_accuracy: 0.7404
Epoch 14/50
40/40 - 0s - loss: 0.4706 - accuracy: 0.7673 - val_loss: 0.5051 - val_accuracy: 0.7490
Epoch 15/50
40/40 - 0s - loss: 0.4634 - accuracy: 0.7683 - val_loss: 0.5141 - val_accuracy: 0.7419
Epoch 16/50
40/40 - 0s - loss: 0.4631 - accuracy: 0.7628 - val_loss: 0.4988 - val_accuracy: 0.7341
Epoch 17/50
40/40 - 0s - loss: 0.4592 - accuracy: 0.7644 - val_loss: 0.5055 - val_accuracy: 0.7498
Epoch 18/50
40/40 - 0s - loss: 0.4533 - accuracy: 0.7691 - val_loss: 0.5003 - val_accuracy: 0.7427
Epoch 19/50
40/40 - 0s - loss: 0.4558 - accuracy: 0.7677 - val_loss: 0.4885 - val_accuracy: 0.7506
Epoch 20/50
40/40 - 0s - loss: 0.4454 - accuracy: 0.7714 - val_loss: 0.5038 - val_accuracy: 0.7254
Epoch 21/50
40/40 - 0s - loss: 0.4455 - accuracy: 0.7793 - val_loss: 0.5126 - val_accuracy: 0.7506
Epoch 22/50
40/40 - 0s - loss: 0.4410 - accuracy: 0.7720 - val_loss: 0.4942 - val_accuracy: 0.7498
Epoch 23/50
40/40 - 0s - loss: 0.4478 - accuracy: 0.7726 - val_loss: 0.5109 - val_accuracy: 0.7498
Epoch 24/50
40/40 - 0s - loss: 0.4474 - accuracy: 0.7683 - val_loss: 0.4917 - val_accuracy: 0.7427
Epoch 25/50
40/40 - 0s - loss: 0.4415 - accuracy: 0.7760 - val_loss: 0.4905 - val_accuracy: 0.7514
Epoch 26/50
40/40 - 0s - loss: 0.4395 - accuracy: 0.7752 - val_loss: 0.5123 - val_accuracy: 0.7419
Epoch 27/50
40/40 - 0s - loss: 0.4414 - accuracy: 0.7756 - val_loss: 0.5014 - val_accuracy: 0.7624
Epoch 28/50
40/40 - 0s - loss: 0.4361 - accuracy: 0.7679 - val_loss: 0.5028 - val_accuracy: 0.7435
Epoch 29/50
40/40 - 0s - loss: 0.4404 - accuracy: 0.7720 - val_loss: 0.5032 - val_accuracy: 0.7404
Epoch 30/50
40/40 - 0s - loss: 0.4432 - accuracy: 0.7691 - val_loss: 0.4951 - val_accuracy: 0.7467
Epoch 31/50
40/40 - 0s - loss: 0.4315 - accuracy: 0.7750 - val_loss: 0.4973 - val_accuracy: 0.7474
Epoch 32/50
40/40 - 0s - loss: 0.4282 - accuracy: 0.7828 - val_loss: 0.5040 - val_accuracy: 0.7380
Epoch 33/50
40/40 - 0s - loss: 0.4219 - accuracy: 0.7832 - val_loss: 0.4863 - val_accuracy: 0.7545
Epoch 34/50
40/40 - 0s - loss: 0.4200 - accuracy: 0.7866 - val_loss: 0.4929 - val_accuracy: 0.7569
Epoch 35/50
40/40 - 0s - loss: 0.4241 - accuracy: 0.7809 - val_loss: 0.4988 - val_accuracy: 0.7530
Epoch 36/50
40/40 - 0s - loss: 0.4209 - accuracy: 0.7870 - val_loss: 0.5025 - val_accuracy: 0.7396
Epoch 37/50
40/40 - 0s - loss: 0.4185 - accuracy: 0.7823 - val_loss: 0.4848 - val_accuracy: 0.7561
Epoch 38/50
40/40 - 0s - loss: 0.4151 - accuracy: 0.7880 - val_loss: 0.4937 - val_accuracy: 0.7482
Epoch 39/50
40/40 - 0s - loss: 0.4158 - accuracy: 0.7815 - val_loss: 0.4950 - val_accuracy: 0.7522
Epoch 40/50
40/40 - 0s - loss: 0.4089 - accuracy: 0.7905 - val_loss: 0.4926 - val_accuracy: 0.7467
Epoch 41/50
40/40 - 0s - loss: 0.4124 - accuracy: 0.7884 - val_loss: 0.4960 - val_accuracy: 0.7482
Epoch 42/50
40/40 - 0s - loss: 0.4077 - accuracy: 0.7891 - val_loss: 0.4997 - val_accuracy: 0.7364
Epoch 43/50
40/40 - 0s - loss: 0.4030 - accuracy: 0.7937 - val_loss: 0.4910 - val_accuracy: 0.7490
Epoch 44/50
40/40 - 0s - loss: 0.4019 - accuracy: 0.7964 - val_loss: 0.4949 - val_accuracy: 0.7506
Epoch 45/50
40/40 - 0s - loss: 0.4008 - accuracy: 0.7968 - val_loss: 0.4866 - val_accuracy: 0.7459
Epoch 46/50
40/40 - 0s - loss: 0.4033 - accuracy: 0.7960 - val_loss: 0.5027 - val_accuracy: 0.7498
Epoch 47/50
40/40 - 0s - loss: 0.4018 - accuracy: 0.7956 - val_loss: 0.4876 - val_accuracy: 0.7545
Epoch 48/50
40/40 - 0s - loss: 0.4215 - accuracy: 0.7852 - val_loss: 0.4994 - val_accuracy: 0.7467
Epoch 49/50
40/40 - 0s - loss: 0.3989 - accuracy: 0.7962 - val_loss: 0.4932 - val_accuracy: 0.7427
Epoch 50/50
40/40 - 0s - loss: 0.3969 - accuracy: 0.7950 - val_loss: 0.4856 - val_accuracy: 0.7522
plot(history1)
model1.evaluate(X_test, y_test, batch_size=128, verbose=2)
13/13 - 0s - loss: 0.5277 - accuracy: 0.7558
[0.5276997089385986, 0.7558212876319885]
Model 2¶
One Embedding Layer + Two layers of fully-connected dense layers
EMBEDDING_DIM = 128
model2 = Sequential()
model2.add(Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True))
model2.add(layers.GlobalAveragePooling1D()) ## The GlobalAveragePooling1D layer returns a fixed-length output vector for each example by averaging over the sequence dimension. This allows the model to handle input of variable length, in the simplest way possible.
model2.add(layers.Dense(128, activation="relu", name="dense_layer_1"))
model2.add(layers.Dense(128, activation="relu", name="dense_layer_2"))
model2.add(layers.Dense(2, activation="softmax", name="output"))
model2.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=["accuracy"]
)
plot_model(model2, show_shapes=True)
history2 = model2.fit(X_train, y_train,
batch_size=128,
epochs=50, verbose=2,
validation_split = 0.2)
Epoch 1/50
40/40 - 1s - loss: 0.6327 - accuracy: 0.6434 - val_loss: 0.5773 - val_accuracy: 0.7113
Epoch 2/50
40/40 - 0s - loss: 0.5739 - accuracy: 0.7036 - val_loss: 0.5572 - val_accuracy: 0.7144
Epoch 3/50
40/40 - 0s - loss: 0.5587 - accuracy: 0.7177 - val_loss: 0.5562 - val_accuracy: 0.7215
Epoch 4/50
40/40 - 0s - loss: 0.5527 - accuracy: 0.7268 - val_loss: 0.5538 - val_accuracy: 0.7270
Epoch 5/50
40/40 - 0s - loss: 0.5463 - accuracy: 0.7321 - val_loss: 0.5729 - val_accuracy: 0.7081
Epoch 6/50
40/40 - 0s - loss: 0.5482 - accuracy: 0.7276 - val_loss: 0.5584 - val_accuracy: 0.7183
Epoch 7/50
40/40 - 0s - loss: 0.5380 - accuracy: 0.7333 - val_loss: 0.5472 - val_accuracy: 0.7317
Epoch 8/50
40/40 - 0s - loss: 0.5326 - accuracy: 0.7384 - val_loss: 0.5491 - val_accuracy: 0.7341
Epoch 9/50
40/40 - 0s - loss: 0.5272 - accuracy: 0.7408 - val_loss: 0.5485 - val_accuracy: 0.7262
Epoch 10/50
40/40 - 0s - loss: 0.5221 - accuracy: 0.7463 - val_loss: 0.5577 - val_accuracy: 0.7223
Epoch 11/50
40/40 - 0s - loss: 0.5162 - accuracy: 0.7498 - val_loss: 0.5463 - val_accuracy: 0.7293
Epoch 12/50
40/40 - 0s - loss: 0.5122 - accuracy: 0.7555 - val_loss: 0.5465 - val_accuracy: 0.7309
Epoch 13/50
40/40 - 0s - loss: 0.5065 - accuracy: 0.7543 - val_loss: 0.5418 - val_accuracy: 0.7317
Epoch 14/50
40/40 - 0s - loss: 0.5057 - accuracy: 0.7581 - val_loss: 0.5531 - val_accuracy: 0.7270
Epoch 15/50
40/40 - 0s - loss: 0.5006 - accuracy: 0.7657 - val_loss: 0.5534 - val_accuracy: 0.7309
Epoch 16/50
40/40 - 0s - loss: 0.4945 - accuracy: 0.7689 - val_loss: 0.5460 - val_accuracy: 0.7341
Epoch 17/50
40/40 - 0s - loss: 0.4884 - accuracy: 0.7718 - val_loss: 0.5480 - val_accuracy: 0.7388
Epoch 18/50
40/40 - 0s - loss: 0.4863 - accuracy: 0.7758 - val_loss: 0.5493 - val_accuracy: 0.7278
Epoch 19/50
40/40 - 0s - loss: 0.4812 - accuracy: 0.7728 - val_loss: 0.5502 - val_accuracy: 0.7270
Epoch 20/50
40/40 - 0s - loss: 0.4752 - accuracy: 0.7825 - val_loss: 0.5525 - val_accuracy: 0.7254
Epoch 21/50
40/40 - 0s - loss: 0.4722 - accuracy: 0.7799 - val_loss: 0.5516 - val_accuracy: 0.7325
Epoch 22/50
40/40 - 0s - loss: 0.4708 - accuracy: 0.7813 - val_loss: 0.5538 - val_accuracy: 0.7309
Epoch 23/50
40/40 - 0s - loss: 0.4636 - accuracy: 0.7840 - val_loss: 0.5533 - val_accuracy: 0.7396
Epoch 24/50
40/40 - 0s - loss: 0.4584 - accuracy: 0.7909 - val_loss: 0.5741 - val_accuracy: 0.7136
Epoch 25/50
40/40 - 0s - loss: 0.4606 - accuracy: 0.7878 - val_loss: 0.5630 - val_accuracy: 0.7215
Epoch 26/50
40/40 - 0s - loss: 0.4580 - accuracy: 0.7821 - val_loss: 0.5575 - val_accuracy: 0.7254
Epoch 27/50
40/40 - 0s - loss: 0.4490 - accuracy: 0.7923 - val_loss: 0.5561 - val_accuracy: 0.7317
Epoch 28/50
40/40 - 0s - loss: 0.4479 - accuracy: 0.7917 - val_loss: 0.5601 - val_accuracy: 0.7341
Epoch 29/50
40/40 - 0s - loss: 0.4453 - accuracy: 0.7927 - val_loss: 0.5580 - val_accuracy: 0.7317
Epoch 30/50
40/40 - 0s - loss: 0.4423 - accuracy: 0.7939 - val_loss: 0.5719 - val_accuracy: 0.7325
Epoch 31/50
40/40 - 0s - loss: 0.4362 - accuracy: 0.7941 - val_loss: 0.5709 - val_accuracy: 0.7278
Epoch 32/50
40/40 - 0s - loss: 0.4361 - accuracy: 0.8004 - val_loss: 0.5579 - val_accuracy: 0.7356
Epoch 33/50
40/40 - 0s - loss: 0.4327 - accuracy: 0.7986 - val_loss: 0.5628 - val_accuracy: 0.7207
Epoch 34/50
40/40 - 0s - loss: 0.4301 - accuracy: 0.8029 - val_loss: 0.5740 - val_accuracy: 0.7175
Epoch 35/50
40/40 - 0s - loss: 0.4278 - accuracy: 0.8066 - val_loss: 0.5702 - val_accuracy: 0.7128
Epoch 36/50
40/40 - 0s - loss: 0.4248 - accuracy: 0.8078 - val_loss: 0.5696 - val_accuracy: 0.7325
Epoch 37/50
40/40 - 0s - loss: 0.4167 - accuracy: 0.8125 - val_loss: 0.5786 - val_accuracy: 0.7278
Epoch 38/50
40/40 - 0s - loss: 0.4175 - accuracy: 0.8057 - val_loss: 0.5764 - val_accuracy: 0.7286
Epoch 39/50
40/40 - 0s - loss: 0.4112 - accuracy: 0.8116 - val_loss: 0.5874 - val_accuracy: 0.7105
Epoch 40/50
40/40 - 0s - loss: 0.4063 - accuracy: 0.8141 - val_loss: 0.5694 - val_accuracy: 0.7246
Epoch 41/50
40/40 - 0s - loss: 0.4100 - accuracy: 0.8163 - val_loss: 0.5826 - val_accuracy: 0.7254
Epoch 42/50
40/40 - 0s - loss: 0.4011 - accuracy: 0.8163 - val_loss: 0.5745 - val_accuracy: 0.7325
Epoch 43/50
40/40 - 0s - loss: 0.3966 - accuracy: 0.8183 - val_loss: 0.5769 - val_accuracy: 0.7238
Epoch 44/50
40/40 - 0s - loss: 0.3985 - accuracy: 0.8157 - val_loss: 0.5891 - val_accuracy: 0.7089
Epoch 45/50
40/40 - 0s - loss: 0.3956 - accuracy: 0.8214 - val_loss: 0.5919 - val_accuracy: 0.7254
Epoch 46/50
40/40 - 0s - loss: 0.3887 - accuracy: 0.8238 - val_loss: 0.5976 - val_accuracy: 0.7333
Epoch 47/50
40/40 - 0s - loss: 0.3853 - accuracy: 0.8267 - val_loss: 0.5894 - val_accuracy: 0.7238
Epoch 48/50
40/40 - 0s - loss: 0.3793 - accuracy: 0.8291 - val_loss: 0.6013 - val_accuracy: 0.7191
Epoch 49/50
40/40 - 0s - loss: 0.3794 - accuracy: 0.8263 - val_loss: 0.6029 - val_accuracy: 0.7231
Epoch 50/50
40/40 - 0s - loss: 0.3743 - accuracy: 0.8332 - val_loss: 0.6209 - val_accuracy: 0.7152
plot(history2)
model2.evaluate(X_test, y_test, batch_size=128, verbose=2)
13/13 - 0s - loss: 0.5622 - accuracy: 0.7495
[0.5622150301933289, 0.7495279908180237]
Check Embeddings¶
Compared to one-hot encodings of characters, embeddings may include more information relating to the characteristics of the characters.
We can extract the embedding layer and apply dimensional reduction techniques (i.e., TSNE) to see how embeddings capture the relationships in-between characters.
ind2char = tokenizer.index_word
[ind2char.get(i) for i in X_test[10]]
[None,
None,
None,
None,
None,
None,
None,
None,
'j',
'e',
'r',
'r',
'o',
'l',
'd']
char_vectors = model2.layers[0].get_weights()[0]
char_vectors.shape
(30, 128)
labels = [char for (ind, char) in tokenizer.index_word.items()]
labels.insert(0,None)
labels
[None,
'a',
'e',
'i',
'n',
'r',
'l',
'o',
't',
's',
'd',
'y',
'm',
'h',
'c',
'b',
'u',
'g',
'k',
'j',
'v',
'f',
'p',
'w',
'z',
'x',
'q',
'-',
' ',
"'"]
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, random_state=0, n_iter=5000, perplexity=2)
np.set_printoptions(suppress=True)
T = tsne.fit_transform(char_vectors)
labels = labels
plt.figure(figsize=(10, 7), dpi=150)
plt.scatter(T[:, 0], T[:, 1], c='orange', edgecolors='r')
for label, x, y in zip(labels, T[:, 0], T[:, 1]):
plt.annotate(label, xy=(x+1, y+1), xytext=(0, 0), textcoords='offset points')
Issues of Word/Character Representations¶
One-hot encoding does not indicate semantic relationships between characters.
For deep learning NLP, it is preferred to convert one-hot encodings of words/characters into embeddings, which are argued to include more semantic information of the tokens.
Now the question is how to train and create better word embeddings. We will come back to this issue later.
Hyperparameter Tuning¶
Like feature-based ML methods, neural networks also come with many hyperparameters, which require default values.
Typical hyperparameters include:
Number of nodes for the layer
Learning Rates
We can utilize the module,
kerastuner, to fine-tune the hyperparameters.
Steps for Keras Tuner
First, wrap the model definition in a function, which takes a single
hpargument.Inside this function, replace any value we want to tune with a call to hyperparameter sampling methods, e.g.
hp.Int()orhp.Choice(). The function should return a compiled model.Next, instantiate a tuner object specifying your optimization objective and other search parameters.
Finally, start the search with the
search()method, which takes the same arguments asModel.fit()in keras.When search is over, we can retrieve the best model and a summary of the results from the
tunner.
import kerastuner
## Wrap model definition in a function
## and specify the parameters needed for tuning
def build_model(hp):
model1 = keras.Sequential()
model1.add(keras.Input(shape=(max_len,)))
model1.add(layers.Dense(hp.Int('units', min_value=32, max_value=128, step=32), activation="relu", name="dense_layer_1"))
model1.add(layers.Dense(hp.Int('units', min_value=32, max_value=128, step=32), activation="relu", name="dense_layer_2"))
model1.add(layers.Dense(2, activation="softmax", name="output"))
model1.compile(
optimizer=keras.optimizers.Adam(
hp.Choice('learning_rate',
values=[1e-2, 1e-3, 1e-4])),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model1
# def build_model(hp):
# inputs = keras.Input(shape=(784,))
# x = layers.Dense(
# units=hp.Int('units', min_value=32, max_value=512, step=32),
# activation='relu'))(inputs)
# outputs = layers.Dense(10, activation='softmax')(x)
# model = keras.Model(inputs, outputs)
# model.compile(
# optimizer=keras.optimizers.Adam(
# hp.Choice('learning_rate',
# values=[1e-2, 1e-3, 1e-4])),
# loss='sparse_categorical_crossentropy',
# metrics=['accuracy'])
# return model
## This is to clean up the temp dir from the tuner
## Every time we re-start the tunner, it's better to keep the temp dir clean
import os
import shutil
if os.path.isdir('my_dir'):
shutil.rmtree('my_dir')
## Instantiate the tunner
tuner = kerastuner.tuners.RandomSearch(
build_model,
objective='val_accuracy',
max_trials=10,
executions_per_trial=3,
directory='my_dir')
## Check the tuner's search space
tuner.search_space_summary()
Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 128, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
## Start tuning with the tuner
tuner.search(X_train, y_train, validation_split=0.2, batch_size=128)
Trial 9 Complete [00h 00m 03s]
val_accuracy: 0.7191188136736552
Best val_accuracy So Far: 0.7191188136736552
Total elapsed time: 00h 00m 31s
INFO:tensorflow:Oracle triggered exit
## Retrieve the best models from the tuner
models = tuner.get_best_models(num_models=2)
## Retrieve the summary of results from the tuner
tuner.results_summary()
Results summary
Results in my_dir/untitled_project
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
units: 128
learning_rate: 0.001
Score: 0.7133490641911825
Trial summary
Hyperparameters:
units: 96
learning_rate: 0.01
Score: 0.7125623027483622
Trial summary
Hyperparameters:
units: 96
learning_rate: 0.001
Score: 0.7004982829093933
Trial summary
Hyperparameters:
units: 64
learning_rate: 0.001
Score: 0.6949908137321472
Trial summary
Hyperparameters:
units: 128
learning_rate: 0.0001
Score: 0.6197220087051392
Trial summary
Hyperparameters:
units: 64
learning_rate: 0.0001
Score: 0.5727773408095042
Sequence Models¶
Model 3¶
One Embedding Layer + LSTM + Dense Layer
EMBEDDING_DIM = 128
model3 = Sequential()
model3.add(Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True))
#model3.add(SpatialDropout1D(0.2))
model3.add(LSTM(64))# , dropout=0.2, recurrent_dropout=0.2))
model3.add(Dense(2, activation="softmax"))
model3.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=["accuracy"]
)
plot_model(model3, show_shapes=True)
history3 = model3.fit(X_train, y_train,
batch_size=128,
epochs=50, verbose=2,
validation_split = 0.2)
Epoch 1/50
40/40 - 6s - loss: 0.6243 - accuracy: 0.6412 - val_loss: 0.5494 - val_accuracy: 0.7293
Epoch 2/50
40/40 - 1s - loss: 0.5036 - accuracy: 0.7606 - val_loss: 0.4463 - val_accuracy: 0.7836
Epoch 3/50
40/40 - 1s - loss: 0.4457 - accuracy: 0.7801 - val_loss: 0.4239 - val_accuracy: 0.8041
Epoch 4/50
40/40 - 1s - loss: 0.4347 - accuracy: 0.7907 - val_loss: 0.4203 - val_accuracy: 0.7946
Epoch 5/50
40/40 - 1s - loss: 0.4294 - accuracy: 0.7939 - val_loss: 0.4139 - val_accuracy: 0.8080
Epoch 6/50
40/40 - 1s - loss: 0.4259 - accuracy: 0.7966 - val_loss: 0.4108 - val_accuracy: 0.8127
Epoch 7/50
40/40 - 1s - loss: 0.4222 - accuracy: 0.8004 - val_loss: 0.4045 - val_accuracy: 0.8135
Epoch 8/50
40/40 - 1s - loss: 0.4190 - accuracy: 0.8025 - val_loss: 0.4051 - val_accuracy: 0.8175
Epoch 9/50
40/40 - 1s - loss: 0.4149 - accuracy: 0.8004 - val_loss: 0.4004 - val_accuracy: 0.8214
Epoch 10/50
40/40 - 1s - loss: 0.4115 - accuracy: 0.8017 - val_loss: 0.3998 - val_accuracy: 0.8143
Epoch 11/50
40/40 - 1s - loss: 0.4075 - accuracy: 0.8009 - val_loss: 0.3972 - val_accuracy: 0.8175
Epoch 12/50
40/40 - 1s - loss: 0.4043 - accuracy: 0.8102 - val_loss: 0.3927 - val_accuracy: 0.8206
Epoch 13/50
40/40 - 1s - loss: 0.4007 - accuracy: 0.8068 - val_loss: 0.3951 - val_accuracy: 0.8183
Epoch 14/50
40/40 - 1s - loss: 0.3969 - accuracy: 0.8078 - val_loss: 0.3899 - val_accuracy: 0.8175
Epoch 15/50
40/40 - 1s - loss: 0.3942 - accuracy: 0.8104 - val_loss: 0.3883 - val_accuracy: 0.8222
Epoch 16/50
40/40 - 1s - loss: 0.3885 - accuracy: 0.8129 - val_loss: 0.3877 - val_accuracy: 0.8230
Epoch 17/50
40/40 - 1s - loss: 0.3855 - accuracy: 0.8127 - val_loss: 0.3896 - val_accuracy: 0.8206
Epoch 18/50
40/40 - 1s - loss: 0.3817 - accuracy: 0.8133 - val_loss: 0.3869 - val_accuracy: 0.8222
Epoch 19/50
40/40 - 1s - loss: 0.3798 - accuracy: 0.8161 - val_loss: 0.3863 - val_accuracy: 0.8167
Epoch 20/50
40/40 - 1s - loss: 0.3759 - accuracy: 0.8242 - val_loss: 0.3802 - val_accuracy: 0.8245
Epoch 21/50
40/40 - 1s - loss: 0.3693 - accuracy: 0.8277 - val_loss: 0.3828 - val_accuracy: 0.8293
Epoch 22/50
40/40 - 1s - loss: 0.3669 - accuracy: 0.8253 - val_loss: 0.3824 - val_accuracy: 0.8198
Epoch 23/50
40/40 - 1s - loss: 0.3629 - accuracy: 0.8299 - val_loss: 0.3823 - val_accuracy: 0.8190
Epoch 24/50
40/40 - 1s - loss: 0.3595 - accuracy: 0.8301 - val_loss: 0.3820 - val_accuracy: 0.8332
Epoch 25/50
40/40 - 1s - loss: 0.3545 - accuracy: 0.8322 - val_loss: 0.3859 - val_accuracy: 0.8277
Epoch 26/50
40/40 - 1s - loss: 0.3492 - accuracy: 0.8287 - val_loss: 0.3833 - val_accuracy: 0.8261
Epoch 27/50
40/40 - 1s - loss: 0.3449 - accuracy: 0.8326 - val_loss: 0.3874 - val_accuracy: 0.8238
Epoch 28/50
40/40 - 1s - loss: 0.3404 - accuracy: 0.8413 - val_loss: 0.3796 - val_accuracy: 0.8277
Epoch 29/50
40/40 - 1s - loss: 0.3344 - accuracy: 0.8393 - val_loss: 0.3824 - val_accuracy: 0.8261
Epoch 30/50
40/40 - 1s - loss: 0.3281 - accuracy: 0.8448 - val_loss: 0.3902 - val_accuracy: 0.8269
Epoch 31/50
40/40 - 1s - loss: 0.3262 - accuracy: 0.8442 - val_loss: 0.3846 - val_accuracy: 0.8253
Epoch 32/50
40/40 - 1s - loss: 0.3207 - accuracy: 0.8485 - val_loss: 0.3854 - val_accuracy: 0.8285
Epoch 33/50
40/40 - 1s - loss: 0.3172 - accuracy: 0.8505 - val_loss: 0.3898 - val_accuracy: 0.8214
Epoch 34/50
40/40 - 1s - loss: 0.3119 - accuracy: 0.8558 - val_loss: 0.3863 - val_accuracy: 0.8261
Epoch 35/50
40/40 - 1s - loss: 0.3056 - accuracy: 0.8552 - val_loss: 0.3890 - val_accuracy: 0.8183
Epoch 36/50
40/40 - 1s - loss: 0.3006 - accuracy: 0.8603 - val_loss: 0.3949 - val_accuracy: 0.8285
Epoch 37/50
40/40 - 1s - loss: 0.2954 - accuracy: 0.8623 - val_loss: 0.3987 - val_accuracy: 0.8198
Epoch 38/50
40/40 - 1s - loss: 0.2913 - accuracy: 0.8672 - val_loss: 0.3932 - val_accuracy: 0.8190
Epoch 39/50
40/40 - 1s - loss: 0.2832 - accuracy: 0.8698 - val_loss: 0.4070 - val_accuracy: 0.8285
Epoch 40/50
40/40 - 1s - loss: 0.2808 - accuracy: 0.8680 - val_loss: 0.3966 - val_accuracy: 0.8285
Epoch 41/50
40/40 - 1s - loss: 0.2732 - accuracy: 0.8723 - val_loss: 0.4122 - val_accuracy: 0.8245
Epoch 42/50
40/40 - 1s - loss: 0.2672 - accuracy: 0.8765 - val_loss: 0.4087 - val_accuracy: 0.8159
Epoch 43/50
40/40 - 1s - loss: 0.2648 - accuracy: 0.8824 - val_loss: 0.4100 - val_accuracy: 0.8151
Epoch 44/50
40/40 - 1s - loss: 0.2579 - accuracy: 0.8836 - val_loss: 0.4197 - val_accuracy: 0.8238
Epoch 45/50
40/40 - 1s - loss: 0.2508 - accuracy: 0.8875 - val_loss: 0.4185 - val_accuracy: 0.8183
Epoch 46/50
40/40 - 1s - loss: 0.2482 - accuracy: 0.8859 - val_loss: 0.4231 - val_accuracy: 0.8120
Epoch 47/50
40/40 - 1s - loss: 0.2428 - accuracy: 0.8914 - val_loss: 0.4277 - val_accuracy: 0.8167
Epoch 48/50
40/40 - 1s - loss: 0.2387 - accuracy: 0.8959 - val_loss: 0.4229 - val_accuracy: 0.8127
Epoch 49/50
40/40 - 1s - loss: 0.2327 - accuracy: 0.8936 - val_loss: 0.4281 - val_accuracy: 0.8206
Epoch 50/50
40/40 - 1s - loss: 0.2290 - accuracy: 0.8971 - val_loss: 0.4390 - val_accuracy: 0.8159
plot(history3)
Model 4¶
One Embedding Layer + Two Stacked LSTM + Dense Layer
EMBEDDING_DIM = 128
model4 = Sequential()
model4.add(Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True))
#model.add(SpatialDropout1D(0.2))
model4.add(LSTM(64, return_sequences=True)) #, dropout=0.2, recurrent_dropout=0.2))
model4.add(LSTM(64))
model4.add(Dense(2, activation="softmax"))
model4.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=["accuracy"]
)
plot_model(model4,show_shapes=True)
history4 = model4.fit(X_train, y_train,
batch_size=128,
epochs=50, verbose=2,
validation_split = 0.2)
Epoch 1/50
40/40 - 10s - loss: 0.6456 - accuracy: 0.6275 - val_loss: 0.6029 - val_accuracy: 0.6672
Epoch 2/50
40/40 - 1s - loss: 0.5884 - accuracy: 0.6841 - val_loss: 0.5460 - val_accuracy: 0.7364
Epoch 3/50
40/40 - 1s - loss: 0.4911 - accuracy: 0.7620 - val_loss: 0.4315 - val_accuracy: 0.7939
Epoch 4/50
40/40 - 1s - loss: 0.4400 - accuracy: 0.7864 - val_loss: 0.4203 - val_accuracy: 0.7939
Epoch 5/50
40/40 - 1s - loss: 0.4337 - accuracy: 0.7899 - val_loss: 0.4144 - val_accuracy: 0.7994
Epoch 6/50
40/40 - 1s - loss: 0.4296 - accuracy: 0.7952 - val_loss: 0.4101 - val_accuracy: 0.8072
Epoch 7/50
40/40 - 1s - loss: 0.4233 - accuracy: 0.7972 - val_loss: 0.4046 - val_accuracy: 0.8088
Epoch 8/50
40/40 - 1s - loss: 0.4195 - accuracy: 0.7962 - val_loss: 0.4021 - val_accuracy: 0.8175
Epoch 9/50
40/40 - 1s - loss: 0.4145 - accuracy: 0.7970 - val_loss: 0.3953 - val_accuracy: 0.8222
Epoch 10/50
40/40 - 1s - loss: 0.4095 - accuracy: 0.8055 - val_loss: 0.3970 - val_accuracy: 0.8175
Epoch 11/50
40/40 - 1s - loss: 0.4065 - accuracy: 0.8013 - val_loss: 0.3947 - val_accuracy: 0.8167
Epoch 12/50
40/40 - 1s - loss: 0.4009 - accuracy: 0.8025 - val_loss: 0.3954 - val_accuracy: 0.8230
Epoch 13/50
40/40 - 1s - loss: 0.3941 - accuracy: 0.8053 - val_loss: 0.3945 - val_accuracy: 0.8167
Epoch 14/50
40/40 - 1s - loss: 0.3936 - accuracy: 0.8053 - val_loss: 0.3856 - val_accuracy: 0.8238
Epoch 15/50
40/40 - 1s - loss: 0.3871 - accuracy: 0.8108 - val_loss: 0.3823 - val_accuracy: 0.8222
Epoch 16/50
40/40 - 1s - loss: 0.3831 - accuracy: 0.8108 - val_loss: 0.3860 - val_accuracy: 0.8253
Epoch 17/50
40/40 - 1s - loss: 0.3797 - accuracy: 0.8167 - val_loss: 0.3837 - val_accuracy: 0.8183
Epoch 18/50
40/40 - 1s - loss: 0.3778 - accuracy: 0.8210 - val_loss: 0.3851 - val_accuracy: 0.8183
Epoch 19/50
40/40 - 1s - loss: 0.3736 - accuracy: 0.8216 - val_loss: 0.4027 - val_accuracy: 0.8088
Epoch 20/50
40/40 - 1s - loss: 0.3683 - accuracy: 0.8279 - val_loss: 0.3774 - val_accuracy: 0.8277
Epoch 21/50
40/40 - 1s - loss: 0.3613 - accuracy: 0.8297 - val_loss: 0.3832 - val_accuracy: 0.8206
Epoch 22/50
40/40 - 1s - loss: 0.3605 - accuracy: 0.8326 - val_loss: 0.3839 - val_accuracy: 0.8214
Epoch 23/50
40/40 - 1s - loss: 0.3549 - accuracy: 0.8277 - val_loss: 0.3842 - val_accuracy: 0.8214
Epoch 24/50
40/40 - 1s - loss: 0.3508 - accuracy: 0.8316 - val_loss: 0.3832 - val_accuracy: 0.8269
Epoch 25/50
40/40 - 1s - loss: 0.3469 - accuracy: 0.8369 - val_loss: 0.3843 - val_accuracy: 0.8269
Epoch 26/50
40/40 - 1s - loss: 0.3408 - accuracy: 0.8381 - val_loss: 0.3836 - val_accuracy: 0.8253
Epoch 27/50
40/40 - 1s - loss: 0.3359 - accuracy: 0.8403 - val_loss: 0.3832 - val_accuracy: 0.8214
Epoch 28/50
40/40 - 1s - loss: 0.3291 - accuracy: 0.8458 - val_loss: 0.3893 - val_accuracy: 0.8261
Epoch 29/50
40/40 - 1s - loss: 0.3256 - accuracy: 0.8478 - val_loss: 0.3854 - val_accuracy: 0.8277
Epoch 30/50
40/40 - 2s - loss: 0.3222 - accuracy: 0.8501 - val_loss: 0.3886 - val_accuracy: 0.8277
Epoch 31/50
40/40 - 1s - loss: 0.3126 - accuracy: 0.8533 - val_loss: 0.4062 - val_accuracy: 0.8175
Epoch 32/50
40/40 - 1s - loss: 0.3093 - accuracy: 0.8527 - val_loss: 0.3949 - val_accuracy: 0.8222
Epoch 33/50
40/40 - 1s - loss: 0.3002 - accuracy: 0.8582 - val_loss: 0.3987 - val_accuracy: 0.8190
Epoch 34/50
40/40 - 1s - loss: 0.2936 - accuracy: 0.8588 - val_loss: 0.3972 - val_accuracy: 0.8143
Epoch 35/50
40/40 - 1s - loss: 0.2890 - accuracy: 0.8643 - val_loss: 0.4175 - val_accuracy: 0.8222
Epoch 36/50
40/40 - 1s - loss: 0.2783 - accuracy: 0.8698 - val_loss: 0.4109 - val_accuracy: 0.8159
Epoch 37/50
40/40 - 1s - loss: 0.2722 - accuracy: 0.8753 - val_loss: 0.4118 - val_accuracy: 0.8222
Epoch 38/50
40/40 - 1s - loss: 0.2625 - accuracy: 0.8763 - val_loss: 0.4205 - val_accuracy: 0.8238
Epoch 39/50
40/40 - 1s - loss: 0.2576 - accuracy: 0.8824 - val_loss: 0.4374 - val_accuracy: 0.8206
Epoch 40/50
40/40 - 1s - loss: 0.2497 - accuracy: 0.8859 - val_loss: 0.4324 - val_accuracy: 0.8183
Epoch 41/50
40/40 - 1s - loss: 0.2417 - accuracy: 0.8875 - val_loss: 0.4376 - val_accuracy: 0.8183
Epoch 42/50
40/40 - 1s - loss: 0.2395 - accuracy: 0.8853 - val_loss: 0.4498 - val_accuracy: 0.8096
Epoch 43/50
40/40 - 1s - loss: 0.2303 - accuracy: 0.8946 - val_loss: 0.4628 - val_accuracy: 0.8112
Epoch 44/50
40/40 - 1s - loss: 0.2228 - accuracy: 0.8979 - val_loss: 0.4532 - val_accuracy: 0.8167
Epoch 45/50
40/40 - 1s - loss: 0.2212 - accuracy: 0.8967 - val_loss: 0.4700 - val_accuracy: 0.8127
Epoch 46/50
40/40 - 1s - loss: 0.2084 - accuracy: 0.9017 - val_loss: 0.4759 - val_accuracy: 0.8214
Epoch 47/50
40/40 - 1s - loss: 0.2057 - accuracy: 0.9076 - val_loss: 0.4780 - val_accuracy: 0.8080
Epoch 48/50
40/40 - 1s - loss: 0.2002 - accuracy: 0.9068 - val_loss: 0.5026 - val_accuracy: 0.8135
Epoch 49/50
40/40 - 1s - loss: 0.1918 - accuracy: 0.9138 - val_loss: 0.4944 - val_accuracy: 0.8049
Epoch 50/50
40/40 - 1s - loss: 0.1876 - accuracy: 0.9129 - val_loss: 0.5131 - val_accuracy: 0.8057
plot(history4)
Model 5¶
One Embedding Layer + LSTM [hidden state of last time step + cell state of last time step] + Dense Layer
EMBEDDING_DIM = 128
inputs = keras.Input(shape=(max_len,))
x=layers.Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len, mask_zero=True)(inputs)
#x=layers.SpatialDropout1D(0.2)(x)
x_all_h,x_last_h, x_c = layers.LSTM(64, dropout=0.2,
recurrent_dropout=0.2,
return_sequences=False, return_state=True)(x)
## LSTM Parameters:
# `return_seqeunces=True`: return the hidden states for each time step
# `return_state=True`: return the cell state of the last time step
# When both are set True, the return values of LSTM are:
# (1) the hidden states of all time steps (when `return_sequences=True`) or the hidden state of the last time step
# (2) the hidden state of the last time step
# (3) the cell state of the last time step
x = layers.Concatenate(axis=1)([x_last_h, x_c])
outputs=layers.Dense(2, activation='softmax')(x)
model5 = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
plot_model(model5, show_shapes=True)
model5.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=["accuracy"]
)
history5 = model5.fit(X_train, y_train,
batch_size=128,
epochs=50, verbose=2,
validation_split = 0.2)
Epoch 1/50
40/40 - 4s - loss: 0.6023 - accuracy: 0.6621 - val_loss: 0.5197 - val_accuracy: 0.7537
Epoch 2/50
40/40 - 1s - loss: 0.4781 - accuracy: 0.7606 - val_loss: 0.4376 - val_accuracy: 0.7891
Epoch 3/50
40/40 - 1s - loss: 0.4450 - accuracy: 0.7840 - val_loss: 0.4225 - val_accuracy: 0.8096
Epoch 4/50
40/40 - 1s - loss: 0.4348 - accuracy: 0.7925 - val_loss: 0.4229 - val_accuracy: 0.8096
Epoch 5/50
40/40 - 1s - loss: 0.4324 - accuracy: 0.7960 - val_loss: 0.4190 - val_accuracy: 0.8104
Epoch 6/50
40/40 - 1s - loss: 0.4290 - accuracy: 0.7941 - val_loss: 0.4148 - val_accuracy: 0.8104
Epoch 7/50
40/40 - 1s - loss: 0.4275 - accuracy: 0.7996 - val_loss: 0.4135 - val_accuracy: 0.8198
Epoch 8/50
40/40 - 1s - loss: 0.4258 - accuracy: 0.7980 - val_loss: 0.4117 - val_accuracy: 0.8135
Epoch 9/50
40/40 - 1s - loss: 0.4204 - accuracy: 0.8029 - val_loss: 0.4058 - val_accuracy: 0.8198
Epoch 10/50
40/40 - 1s - loss: 0.4201 - accuracy: 0.7996 - val_loss: 0.4035 - val_accuracy: 0.8096
Epoch 11/50
40/40 - 1s - loss: 0.4137 - accuracy: 0.8051 - val_loss: 0.4024 - val_accuracy: 0.8135
Epoch 12/50
40/40 - 1s - loss: 0.4124 - accuracy: 0.8009 - val_loss: 0.4012 - val_accuracy: 0.8175
Epoch 13/50
40/40 - 1s - loss: 0.4110 - accuracy: 0.7994 - val_loss: 0.3979 - val_accuracy: 0.8198
Epoch 14/50
40/40 - 1s - loss: 0.4079 - accuracy: 0.8051 - val_loss: 0.3955 - val_accuracy: 0.8230
Epoch 15/50
40/40 - 1s - loss: 0.4050 - accuracy: 0.8080 - val_loss: 0.3979 - val_accuracy: 0.8198
Epoch 16/50
40/40 - 1s - loss: 0.4054 - accuracy: 0.8070 - val_loss: 0.3948 - val_accuracy: 0.8190
Epoch 17/50
40/40 - 1s - loss: 0.4007 - accuracy: 0.8088 - val_loss: 0.3943 - val_accuracy: 0.8238
Epoch 18/50
40/40 - 1s - loss: 0.3969 - accuracy: 0.8072 - val_loss: 0.3910 - val_accuracy: 0.8198
Epoch 19/50
40/40 - 1s - loss: 0.3956 - accuracy: 0.8104 - val_loss: 0.3896 - val_accuracy: 0.8120
Epoch 20/50
40/40 - 1s - loss: 0.3925 - accuracy: 0.8096 - val_loss: 0.3885 - val_accuracy: 0.8167
Epoch 21/50
40/40 - 1s - loss: 0.3923 - accuracy: 0.8147 - val_loss: 0.3890 - val_accuracy: 0.8127
Epoch 22/50
40/40 - 1s - loss: 0.3877 - accuracy: 0.8175 - val_loss: 0.3862 - val_accuracy: 0.8206
Epoch 23/50
40/40 - 1s - loss: 0.3828 - accuracy: 0.8112 - val_loss: 0.3863 - val_accuracy: 0.8167
Epoch 24/50
40/40 - 1s - loss: 0.3810 - accuracy: 0.8185 - val_loss: 0.3837 - val_accuracy: 0.8222
Epoch 25/50
40/40 - 1s - loss: 0.3801 - accuracy: 0.8127 - val_loss: 0.3851 - val_accuracy: 0.8230
Epoch 26/50
40/40 - 1s - loss: 0.3746 - accuracy: 0.8188 - val_loss: 0.3826 - val_accuracy: 0.8277
Epoch 27/50
40/40 - 1s - loss: 0.3742 - accuracy: 0.8196 - val_loss: 0.3837 - val_accuracy: 0.8261
Epoch 28/50
40/40 - 1s - loss: 0.3717 - accuracy: 0.8220 - val_loss: 0.3826 - val_accuracy: 0.8245
Epoch 29/50
40/40 - 1s - loss: 0.3661 - accuracy: 0.8249 - val_loss: 0.3804 - val_accuracy: 0.8253
Epoch 30/50
40/40 - 1s - loss: 0.3680 - accuracy: 0.8232 - val_loss: 0.3828 - val_accuracy: 0.8222
Epoch 31/50
40/40 - 1s - loss: 0.3601 - accuracy: 0.8279 - val_loss: 0.3815 - val_accuracy: 0.8222
Epoch 32/50
40/40 - 1s - loss: 0.3527 - accuracy: 0.8301 - val_loss: 0.3812 - val_accuracy: 0.8214
Epoch 33/50
40/40 - 1s - loss: 0.3521 - accuracy: 0.8362 - val_loss: 0.3835 - val_accuracy: 0.8277
Epoch 34/50
40/40 - 1s - loss: 0.3526 - accuracy: 0.8312 - val_loss: 0.3845 - val_accuracy: 0.8198
Epoch 35/50
40/40 - 1s - loss: 0.3406 - accuracy: 0.8365 - val_loss: 0.3832 - val_accuracy: 0.8198
Epoch 36/50
40/40 - 1s - loss: 0.3444 - accuracy: 0.8338 - val_loss: 0.3806 - val_accuracy: 0.8253
Epoch 37/50
40/40 - 1s - loss: 0.3396 - accuracy: 0.8342 - val_loss: 0.3888 - val_accuracy: 0.8214
Epoch 38/50
40/40 - 1s - loss: 0.3379 - accuracy: 0.8391 - val_loss: 0.3845 - val_accuracy: 0.8190
Epoch 39/50
40/40 - 1s - loss: 0.3296 - accuracy: 0.8483 - val_loss: 0.3911 - val_accuracy: 0.8183
Epoch 40/50
40/40 - 1s - loss: 0.3289 - accuracy: 0.8423 - val_loss: 0.3852 - val_accuracy: 0.8245
Epoch 41/50
40/40 - 1s - loss: 0.3277 - accuracy: 0.8440 - val_loss: 0.3863 - val_accuracy: 0.8238
Epoch 42/50
40/40 - 1s - loss: 0.3198 - accuracy: 0.8497 - val_loss: 0.3839 - val_accuracy: 0.8206
Epoch 43/50
40/40 - 1s - loss: 0.3134 - accuracy: 0.8556 - val_loss: 0.3894 - val_accuracy: 0.8127
Epoch 44/50
40/40 - 1s - loss: 0.3155 - accuracy: 0.8537 - val_loss: 0.3890 - val_accuracy: 0.8198
Epoch 45/50
40/40 - 1s - loss: 0.3109 - accuracy: 0.8531 - val_loss: 0.3933 - val_accuracy: 0.8214
Epoch 46/50
40/40 - 1s - loss: 0.3115 - accuracy: 0.8554 - val_loss: 0.3914 - val_accuracy: 0.8183
Epoch 47/50
40/40 - 1s - loss: 0.3033 - accuracy: 0.8558 - val_loss: 0.3889 - val_accuracy: 0.8127
Epoch 48/50
40/40 - 1s - loss: 0.3002 - accuracy: 0.8578 - val_loss: 0.3889 - val_accuracy: 0.8198
Epoch 49/50
40/40 - 1s - loss: 0.2972 - accuracy: 0.8605 - val_loss: 0.3963 - val_accuracy: 0.8230
Epoch 50/50
40/40 - 1s - loss: 0.2941 - accuracy: 0.8645 - val_loss: 0.3952 - val_accuracy: 0.8159
plot(history5)
model5.evaluate(X_test, y_test, batch_size=128, verbose=2)
13/13 - 0s - loss: 0.3842 - accuracy: 0.8232
[0.3841722905635834, 0.8231592178344727]
Model 6¶
Adding AttentionLayer
Use the hidden state h of the last time step and the cell state c of the last time step
Check their attention
And use [attention out + hidden state h of the last time step] for decision
EMBEDDING_DIM = 128
inputs = keras.Input(shape=(max_len,))
x=layers.Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len)(inputs)
#x=layers.SpatialDropout1D(0.2)(x)
x_all_hs, x_last_h, x_last_c = layers.LSTM(64, dropout=0.2,
recurrent_dropout=0.2,
return_sequences=True, return_state=True)(x)
## LSTM Parameters:
# `return_seqeunces=True`: return the hidden states for each time step
# `return_state=True`: return the cell state of the last time step
# When both are set True, the return values of LSTM are:
# (1) the hidden state of the last time step
# (2) the hidden states of all time steps (when `return_sequences=True`) or the hidden state of the last time step
# (3) the cell state of the last time step
atten_out = layers.Attention()([x_last_h, x_last_c])
x = layers.Concatenate(axis=1)([x_last_h, atten_out])
outputs=layers.Dense(2, activation='softmax')(x)
model6 = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
plot_model(model6, show_shapes=True)
model6.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(lr=0.001),
metrics=["accuracy"]
)
history6 = model6.fit(X_train, y_train,
batch_size=128,
epochs=50, verbose=2,
validation_split = 0.2)
Epoch 1/50
40/40 - 4s - loss: 0.6362 - accuracy: 0.6290 - val_loss: 0.5719 - val_accuracy: 0.6994
Epoch 2/50
40/40 - 1s - loss: 0.5220 - accuracy: 0.7400 - val_loss: 0.4661 - val_accuracy: 0.7710
Epoch 3/50
40/40 - 1s - loss: 0.4529 - accuracy: 0.7791 - val_loss: 0.4409 - val_accuracy: 0.7852
Epoch 4/50
40/40 - 1s - loss: 0.4442 - accuracy: 0.7891 - val_loss: 0.4220 - val_accuracy: 0.8072
Epoch 5/50
40/40 - 1s - loss: 0.4356 - accuracy: 0.7911 - val_loss: 0.4156 - val_accuracy: 0.8065
Epoch 6/50
40/40 - 1s - loss: 0.4285 - accuracy: 0.7921 - val_loss: 0.4137 - val_accuracy: 0.8135
Epoch 7/50
40/40 - 2s - loss: 0.4239 - accuracy: 0.7992 - val_loss: 0.4093 - val_accuracy: 0.8065
Epoch 8/50
40/40 - 1s - loss: 0.4203 - accuracy: 0.8007 - val_loss: 0.4077 - val_accuracy: 0.8049
Epoch 9/50
40/40 - 1s - loss: 0.4159 - accuracy: 0.8037 - val_loss: 0.4048 - val_accuracy: 0.8104
Epoch 10/50
40/40 - 1s - loss: 0.4128 - accuracy: 0.7968 - val_loss: 0.3992 - val_accuracy: 0.8190
Epoch 11/50
40/40 - 1s - loss: 0.4117 - accuracy: 0.8019 - val_loss: 0.3989 - val_accuracy: 0.8159
Epoch 12/50
40/40 - 1s - loss: 0.4055 - accuracy: 0.8033 - val_loss: 0.3954 - val_accuracy: 0.8096
Epoch 13/50
40/40 - 1s - loss: 0.4047 - accuracy: 0.8063 - val_loss: 0.3966 - val_accuracy: 0.8104
Epoch 14/50
40/40 - 1s - loss: 0.4010 - accuracy: 0.8053 - val_loss: 0.3889 - val_accuracy: 0.8135
Epoch 15/50
40/40 - 1s - loss: 0.3976 - accuracy: 0.8070 - val_loss: 0.4063 - val_accuracy: 0.8088
Epoch 16/50
40/40 - 1s - loss: 0.3968 - accuracy: 0.8108 - val_loss: 0.3864 - val_accuracy: 0.8159
Epoch 17/50
40/40 - 1s - loss: 0.3942 - accuracy: 0.8080 - val_loss: 0.3876 - val_accuracy: 0.8175
Epoch 18/50
40/40 - 1s - loss: 0.3927 - accuracy: 0.8102 - val_loss: 0.3878 - val_accuracy: 0.8159
Epoch 19/50
40/40 - 1s - loss: 0.3900 - accuracy: 0.8110 - val_loss: 0.3831 - val_accuracy: 0.8190
Epoch 20/50
40/40 - 1s - loss: 0.3873 - accuracy: 0.8124 - val_loss: 0.3852 - val_accuracy: 0.8198
Epoch 21/50
40/40 - 1s - loss: 0.3870 - accuracy: 0.8159 - val_loss: 0.3841 - val_accuracy: 0.8206
Epoch 22/50
40/40 - 1s - loss: 0.3825 - accuracy: 0.8135 - val_loss: 0.3849 - val_accuracy: 0.8285
Epoch 23/50
40/40 - 1s - loss: 0.3818 - accuracy: 0.8167 - val_loss: 0.3838 - val_accuracy: 0.8261
Epoch 24/50
40/40 - 1s - loss: 0.3784 - accuracy: 0.8143 - val_loss: 0.3814 - val_accuracy: 0.8198
Epoch 25/50
40/40 - 1s - loss: 0.3761 - accuracy: 0.8181 - val_loss: 0.3830 - val_accuracy: 0.8222
Epoch 26/50
40/40 - 1s - loss: 0.3721 - accuracy: 0.8179 - val_loss: 0.3831 - val_accuracy: 0.8167
Epoch 27/50
40/40 - 1s - loss: 0.3694 - accuracy: 0.8232 - val_loss: 0.3800 - val_accuracy: 0.8222
Epoch 28/50
40/40 - 1s - loss: 0.3672 - accuracy: 0.8226 - val_loss: 0.3806 - val_accuracy: 0.8285
Epoch 29/50
40/40 - 1s - loss: 0.3635 - accuracy: 0.8204 - val_loss: 0.3800 - val_accuracy: 0.8206
Epoch 30/50
40/40 - 1s - loss: 0.3618 - accuracy: 0.8259 - val_loss: 0.3791 - val_accuracy: 0.8190
Epoch 31/50
40/40 - 1s - loss: 0.3569 - accuracy: 0.8279 - val_loss: 0.3787 - val_accuracy: 0.8245
Epoch 32/50
40/40 - 1s - loss: 0.3591 - accuracy: 0.8222 - val_loss: 0.3848 - val_accuracy: 0.8183
Epoch 33/50
40/40 - 1s - loss: 0.3548 - accuracy: 0.8253 - val_loss: 0.3820 - val_accuracy: 0.8190
Epoch 34/50
40/40 - 1s - loss: 0.3483 - accuracy: 0.8348 - val_loss: 0.3763 - val_accuracy: 0.8190
Epoch 35/50
40/40 - 1s - loss: 0.3448 - accuracy: 0.8336 - val_loss: 0.3839 - val_accuracy: 0.8120
Epoch 36/50
40/40 - 1s - loss: 0.3437 - accuracy: 0.8360 - val_loss: 0.3803 - val_accuracy: 0.8214
Epoch 37/50
40/40 - 1s - loss: 0.3406 - accuracy: 0.8362 - val_loss: 0.3818 - val_accuracy: 0.8190
Epoch 38/50
40/40 - 1s - loss: 0.3364 - accuracy: 0.8424 - val_loss: 0.3863 - val_accuracy: 0.8159
Epoch 39/50
40/40 - 1s - loss: 0.3352 - accuracy: 0.8389 - val_loss: 0.3820 - val_accuracy: 0.8198
Epoch 40/50
40/40 - 1s - loss: 0.3291 - accuracy: 0.8440 - val_loss: 0.3865 - val_accuracy: 0.8261
Epoch 41/50
40/40 - 1s - loss: 0.3251 - accuracy: 0.8444 - val_loss: 0.3829 - val_accuracy: 0.8190
Epoch 42/50
40/40 - 1s - loss: 0.3231 - accuracy: 0.8503 - val_loss: 0.3869 - val_accuracy: 0.8167
Epoch 43/50
40/40 - 1s - loss: 0.3236 - accuracy: 0.8499 - val_loss: 0.3828 - val_accuracy: 0.8190
Epoch 44/50
40/40 - 1s - loss: 0.3160 - accuracy: 0.8470 - val_loss: 0.3805 - val_accuracy: 0.8167
Epoch 45/50
40/40 - 1s - loss: 0.3134 - accuracy: 0.8521 - val_loss: 0.3829 - val_accuracy: 0.8261
Epoch 46/50
40/40 - 1s - loss: 0.3105 - accuracy: 0.8529 - val_loss: 0.3856 - val_accuracy: 0.8198
Epoch 47/50
40/40 - 1s - loss: 0.3086 - accuracy: 0.8554 - val_loss: 0.3934 - val_accuracy: 0.8238
Epoch 48/50
40/40 - 1s - loss: 0.3059 - accuracy: 0.8556 - val_loss: 0.3894 - val_accuracy: 0.8230
Epoch 49/50
40/40 - 1s - loss: 0.3001 - accuracy: 0.8572 - val_loss: 0.3910 - val_accuracy: 0.8167
Epoch 50/50
40/40 - 1s - loss: 0.2988 - accuracy: 0.8548 - val_loss: 0.3875 - val_accuracy: 0.8253
plot(history6)
Explanation¶
from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=['female','male'], char_level=True)
def model_predict_pipeline(text):
_seq = tokenizer.texts_to_sequences(text)
_seq_pad = keras.preprocessing.sequence.pad_sequences(_seq, maxlen=max_len)
#return np.array([[float(1-x), float(x)] for x in model.predict(np.array(_seq_pad))])
return model6.predict(np.array(_seq_pad))
# np.array(sequence.pad_sequences(
# tokenizer.texts_to_sequences([n for (n,l) in test_set]),
# maxlen = max_len)).astype('float32')
reversed_word_index = dict([(index, word) for (word, index) in tokenizer.word_index.items()])
text_id =305
X_test[text_id]
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 24., 7., 5.,
1., 13.], dtype=float32)
X_test_texts[text_id]
'Zorah'
' '.join([reversed_word_index.get(i, '?') for i in X_test[text_id]])
'? ? ? ? ? ? ? ? ? ? z o r a h'
print(X_test[22])
print(X_test_texts[22])
[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 4. 10. 5. 2. 11.]
Andrey
X_test_texts[text_id]
'Zorah'
model_predict_pipeline([X_test_texts[text_id]])
array([[0.7218352, 0.2781648]], dtype=float32)
exp = explainer.explain_instance(
X_test_texts[text_id], model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
y_test[text_id]
0
exp = explainer.explain_instance(
'Alvin', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
exp = explainer.explain_instance(
'Michaelis', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
exp = explainer.explain_instance(
'Sidney', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)
exp = explainer.explain_instance(
'Timber', model_predict_pipeline, num_features=100, top_labels=1)
exp.show_in_notebook(text=True)